In [1]:
import re
import pandas as pd
import BQhelper as bq
import inspectTable
bq.project = "mlab-sandbox"
In [3]:
# Replicate/audit inventories from 2020/02/28
# measurement-lab.ndt.downloads as Inventory_measurement_lab_ndt_downloads
# measurement-lab.ndt.uploads AS Inventory_measurement_lab_ndt_uploads
# mlab-oti.base_tables.ndt AS Inventory_mlab_oit_base_tables
basicIQ="""
WITH
CurrTab AS (
SELECT
CAST(log_time AS DATE) as test_date,
REPLACE(REGEXP_EXTRACT(task_filename, "-(mlab[1-4]-[a-z][a-z][a-z][0-9][0-9t])-"), "-", ".") AS hostname,
connection_spec.data_direction AS direction,
COUNT( DISTINCT test_id) AS current_tests
FROM `{table}`
GROUP BY test_date, hostname, direction
),
RawData AS (
SELECT * FROM {table} LIMIT 100
),
deltaTab AS (
SELECT *, CurrTab.current_tests-legacy_tests AS delta
FROM CurrTab
JOIN {archivedTable} AS archive
USING (test_date, hostname, direction)
WHERE CurrTab.current_tests != archive.legacy_tests
)
# select * FROM DeltaTab LIMIT 10
SELECT
SUM(legacy_tests) AS legacy,
SUM(current_tests) AS cur,
SUM(delta) AS netD,
SUM(delta) / SUM(legacy_tests) AS ratio
FROM deltaTab
"""
r=bq.DataFrameQuery(basicIQ, table='mlab-oti.base_tables.ndt', archivedTable='mlab-sandbox.mattmathis.Inventory_mlab_oit_base_tables')
print (r)